/*
Assemble relevant state characteristics for both 2000 and 2010 period

Author: GA
Last Modified: 9/19/2019
*/

//obtain normalizing wage from PSID
use "$temp/psid_long_cleaned", replace
su wages [w=weight]
clear
set obs 1
gen mean = `r(mean)'
save "$temp/wage_norm", replace

*********************Costs of Living*********************
//use COLI data
import delimited "$data/COLI/coli_2019.csv", clear
keep v1 v3
ren v1 state_full 
ren v3 index
replace state = subinstr(state, "_", " ", .)
merge 1:1 state_full using "$data/Crosswalks/state_fips_crosswalk", keep(match) nogen
ren index coli_late
drop state_full state
ren statefips stfips
merge 1:1 stfips using "$data/COLI/coli", keep(match) nogen
ren coli coli_early
gen coli_2000 = (coli_late + coli_early)/2
gen coli_2010 = coli_late * (3/4) + coli_early*(1/4)
corr coli_early coli_2000

//normalize 2000 Iowa level
su coli_2000 if stfips == 19
replace coli_2000 = coli_2000 / `r(mean)'
replace coli_2010 = coli_2010 / `r(mean)'
keep stfips coli_2000 coli_2010 coli_early
save "$temp/state_coli_dynamic", replace

//try out RPP sometime
import delimited "$data/BLS_RPP/rpp.csv", clear
destring geofips, replace force
replace geofips =  geofips/1000
keep if linecode==1
drop if geofips==0 | geofips==13
egen rpp = rowmean(v*)
drop v* description linecode geoname
ren geofips stfips
merge 1:1 stfips using "$temp/state_coli_dynamic", keep(match) nogen //strongly correlated with what I use.

*********************School Qualities*********************
use "$data/LOO/onlinedata8", clear //read in
keep cz pop2000 stateabbrv ccd*
replace pop = subinstr(pop, ",", "",.)
destring pop2000, replace
ren ccd_exp govt_spending //renaming
ren ccd s_t_ratio //again
ren stateabbrv state

//deflate: 1996-1997 value is 73.346
replace govt_spending = govt_spending/0.73346

//recode expenditure variable
replace govt_spending = govt_spending*1000 //now per pupil

//normalize
preserve
	use "$temp/wage_norm", clear
	su mean
	local norm `r(mean)'
restore

replace govt = govt/`norm' //normalize by mean earnings
collapse (mean) govt s_t_ratio [fweight = pop2000], by(state) //collapse

//merge on fips code
merge 1:1 state using "$data/Crosswalks/state_fips_crosswalk"
keep statefips govt s_t
ren statefips stfips
drop if stfips == 11 //exclude the usual states

//fill in New Jersey
su gov if stfips == 36 //just use New York to do this for now
replace govt = `r(mean)' if stfips == 34
save "$temp/state_school_chars", replace


*****actual education data
use "$data/School_Finance/finance_eventstudysamp_post90events", clear
keep if fy == 2000 | fy == 2010
duplicates drop fipst fy, force
ren fipst stfips
ren fy year
keep stfips year mean_te_pp_r mean_pup_tch
ren mean_te govt_exp
ren mean_pup s_t_ratio 

//deflate expenditure to 2012
replace govt_exp = govt_exp / 1.01346

****normalize by mean earnings
preserve
	use "$temp/wage_norm", clear
	su mean
	local norm `r(mean)'
restore

replace govt = govt/`norm' //normalize by mean earnings

//reshape
gen temp = govt_exp if year == 2010
bys stfips: egen govt_exp_2010 = mean(temp)
drop temp

gen temp = s_t_ratio if year == 2010
bys stfips: egen s_t_ratio_2010 = mean(temp)
drop temp

ren govt_exp govt_exp_2000
ren s_t_ratio s_t_ratio_2000
drop if year == 2010

//check consistency
merge 1:1 stfips using "$temp/state_school_chars", nogen //all correlations of around 0.9 -- goods!
replace s_t_ratio_2000 = s_t_ratio if s_t_ratio_2000 == .
replace s_t_ratio_2010 = s_t_ratio if s_t_ratio_2010 == .

//fill in Hawaii from CHKS measure, adjusting for difference in means
drop year

su govt_exp_2000
local m1 = `r(mean)'
su govt_exp_2010
local m2 = `r(mean)'
su govt_spending
local m3 = `r(mean)'

replace govt_exp_2000 = govt_spending * (`m1'/`m3') if govt_exp_2000 == .
replace govt_exp_2010 = govt_spending * (`m2'/`m3') if govt_exp_2010 == .

//normalize s_t_ratios
su s_t_ratio_2000
replace s_t_ratio_2000 = s_t_ratio_2000 / `r(mean)'
replace s_t_ratio_2010 = s_t_ratio_2010 / `r(mean)'
drop govt_spending s_t_ratio
save "$temp/state_school_chars_dynamic", replace

**********************Population*********************************
import delimited "$data/Population/state_populations_2000_2010.csv", clear 
drop v1 v5 v6
ren v2 state_full
ren v3 pop_2000
ren v4 pop_2010
replace state_full = subinstr(state_full, " ", "", .)
replace pop_2000 = subinstr(pop_2000, ",", "", .)
replace pop_2010 = subinstr(pop_2010, ",", "", .)
destring pop*, replace
merge 1:1 state_full using "$data/Crosswalks/state_fips_crosswalk", keep(match) nogen
drop if statefips == 11
ren statefips stfips
keep stfips pop*
replace pop_2000 = pop_2000 / 1000000
replace pop_2010 = pop_2010 / 1000000
save "$temp/state_populations_dynamic", replace



exit //cut off here: more to collect later on
**********************Skill Prices*********************************



//end of dofile